#checking nulls
percent_nulls(dat) %>%
arrange(desc(percent_null)) %>%
mutate(
percent_null = round(percent_null, 2)
) %>%
kable(
col.names = c("Variable", "Percent Missing"),
caption = "Percentage of Missing Values by Variable"
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "center"
)
| Variable | Percent Missing |
|---|---|
| CD4 count(Last two months) | 98.74 |
| CD4 count(First two months) | 98.69 |
| EPTB Others | 98.26 |
| Culture S | 98.25 |
| Culture H | 98.21 |
| Culture E | 98.18 |
| Lipa Hain Isoniazid | 97.55 |
| Culture R | 97.44 |
| Lipa Hain Rifampicin) | 96.96 |
| TB LAM | 96.21 |
| Culture Date | 93.26 |
| Culture Result | 93.18 |
| Xray Date | 92.45 |
| GeneXpert Date | 89.26 |
| EPTB Sub Type | 85.96 |
| 0th Month Serial No and Quantification | 80.77 |
| 5th Month Serial No and Quantification | 76.90 |
| Sputum Smear Examination 0th Month Result Date | 76.26 |
| Sputam Smear Examination 5th Month Result Date | 75.52 |
| 6by8 Month Serial No and Quantification | 73.68 |
| Sputum Smear Examination 6by8 Month Result Date | 69.36 |
| Comorbidity | 68.30 |
| 2by3 Month Serial No and Quantification | 67.77 |
| Sputum Smear Examination 2by3 Month Result Date | 64.81 |
| Xray | 48.63 |
| GeneXpert | 1.32 |
| Sputum Smear Examination 2by3 Month Result | 0.69 |
| ownertype | 0.61 |
| Is Patient Prisoner | 0.36 |
| Sputum Smear Examination 6by8 Month Result | 0.31 |
| Sputam Smear Examination 5th Month Result | 0.31 |
| Treatment Outcome Date | 0.22 |
| Regimen | 0.21 |
| Nutrition Support | 0.19 |
| kephlevel | 0.13 |
| BMI | 0.03 |
| Height (Mtrs) | 0.02 |
| Health Facility | 0.02 |
| Sector | 0.00 |
| Sub County Registration Number | 0.00 |
| Sputum Smear Examination 0th Month Result | 0.00 |
| County | 0.00 |
| Zone | 0.00 |
| Treatment Outcome | 0.00 |
| Serial Number | 0.00 |
| Date of Registration | 0.00 |
| Month | 0.00 |
| Region | 0.00 |
| Sub County | 0.00 |
| Year | 0.00 |
| Quarter | 0.00 |
| Sex (M/F) | 0.00 |
| Age on Registration | 0.00 |
| Weight (Kgs) | 0.00 |
| BMI/Z-Score | 0.00 |
| DOT by | 0.00 |
| Type of TB(P/EP) | 0.00 |
| Type of patient | 0.00 |
| Date of treatment started | 0.00 |
| Referred BY: VCT/HCC/STI/HBC/PS/ANC/SR/CI | 0.00 |
| Referred TO: VCT/HCC/STI/HBC/PS/ANC | 0.00 |
| mfl_code | 0.00 |
dat2 %>%
count(County) %>%
mutate(
percentage = round(n / sum(n) * 100, 2)
) %>%
arrange(desc(n)) %>%
flextable() %>%
autofit()
County | n | percentage |
|---|---|---|
Nairobi | 74,676 | 14.33 |
Kiambu | 27,393 | 5.26 |
Meru | 24,892 | 4.78 |
Mombasa | 24,804 | 4.76 |
Nakuru | 20,595 | 3.95 |
Machakos | 15,811 | 3.04 |
Turkana | 15,398 | 2.96 |
Siaya | 14,472 | 2.78 |
Kitui | 14,399 | 2.76 |
Kisumu | 14,004 | 2.69 |
Kakamega | 13,793 | 2.65 |
Homa Bay | 13,728 | 2.64 |
Murang'a | 12,748 | 2.45 |
Kericho | 12,331 | 2.37 |
Kisii | 11,586 | 2.22 |
Makueni | 11,344 | 2.18 |
Kilifi | 11,102 | 2.13 |
Bomet | 10,616 | 2.04 |
Bungoma | 10,150 | 1.95 |
Uasin Gishu | 10,060 | 1.93 |
Migori | 9,950 | 1.91 |
Embu | 9,789 | 1.88 |
Kajiado | 9,591 | 1.84 |
Pokot | 9,483 | 1.82 |
Narok | 9,193 | 1.76 |
Nyeri | 8,720 | 1.67 |
Kirinyaga | 8,211 | 1.58 |
Trans Nzoia | 7,191 | 1.38 |
Busia | 6,562 | 1.26 |
Tharaka Nithi | 6,491 | 1.25 |
Laikipia | 5,966 | 1.15 |
Kwale | 5,822 | 1.12 |
Garissa | 5,661 | 1.09 |
Vihiga | 5,154 | 0.99 |
Baringo | 4,862 | 0.93 |
Nyamira | 4,853 | 0.93 |
Nandi | 4,722 | 0.91 |
Nyandarua | 4,268 | 0.82 |
Samburu | 4,134 | 0.79 |
Marsabit | 3,874 | 0.74 |
Mandera | 3,834 | 0.74 |
Isiolo | 3,783 | 0.73 |
Taita Taveta | 3,529 | 0.68 |
Wajir | 3,475 | 0.67 |
Elgeyo Marakwet | 3,422 | 0.66 |
Tana River | 2,597 | 0.50 |
Lamu | 1,913 | 0.37 |
1 | 0.00 |
# Summarize counts per county
county_summary <- dat2 %>%
count(County) %>%
mutate(
percentage = round(n / sum(n) * 100, 2)
) %>%
arrange(desc(n))
# Add a total row
# Summarize counts per county
county_summary <- dat2 %>%
count(County) %>%
mutate(
percentage = round(n / sum(n) * 100, 2),
total_cases = sum(n) # same total repeated for each row
) %>%
arrange(desc(n))
county_summary <- dat2 %>%
filter(!is.na(County)) %>% # remove rows with missing county
count(County) %>%
mutate(
total_cases = sum(n), # total TB cases repeated for each row
percentage = round(n / total_cases * 100, 2)
) %>%
arrange(desc(n)) %>%
select(County, n, total_cases, percentage) # reorder columns
# Print table with kableExtra
county_summary %>%
kable(
col.names = c("County", "Number of Patients", "Total TB Cases", "Percentage of Total"),
caption = "TB Cases Distribution by County"
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "center"
)
| County | Number of Patients | Total TB Cases | Percentage of Total |
|---|---|---|---|
| Nairobi | 74676 | 520952 | 14.33 |
| Kiambu | 27393 | 520952 | 5.26 |
| Meru | 24892 | 520952 | 4.78 |
| Mombasa | 24804 | 520952 | 4.76 |
| Nakuru | 20595 | 520952 | 3.95 |
| Machakos | 15811 | 520952 | 3.04 |
| Turkana | 15398 | 520952 | 2.96 |
| Siaya | 14472 | 520952 | 2.78 |
| Kitui | 14399 | 520952 | 2.76 |
| Kisumu | 14004 | 520952 | 2.69 |
| Kakamega | 13793 | 520952 | 2.65 |
| Homa Bay | 13728 | 520952 | 2.64 |
| Murang’a | 12748 | 520952 | 2.45 |
| Kericho | 12331 | 520952 | 2.37 |
| Kisii | 11586 | 520952 | 2.22 |
| Makueni | 11344 | 520952 | 2.18 |
| Kilifi | 11102 | 520952 | 2.13 |
| Bomet | 10616 | 520952 | 2.04 |
| Bungoma | 10150 | 520952 | 1.95 |
| Uasin Gishu | 10060 | 520952 | 1.93 |
| Migori | 9950 | 520952 | 1.91 |
| Embu | 9789 | 520952 | 1.88 |
| Kajiado | 9591 | 520952 | 1.84 |
| Pokot | 9483 | 520952 | 1.82 |
| Narok | 9193 | 520952 | 1.76 |
| Nyeri | 8720 | 520952 | 1.67 |
| Kirinyaga | 8211 | 520952 | 1.58 |
| Trans Nzoia | 7191 | 520952 | 1.38 |
| Busia | 6562 | 520952 | 1.26 |
| Tharaka Nithi | 6491 | 520952 | 1.25 |
| Laikipia | 5966 | 520952 | 1.15 |
| Kwale | 5822 | 520952 | 1.12 |
| Garissa | 5661 | 520952 | 1.09 |
| Vihiga | 5154 | 520952 | 0.99 |
| Baringo | 4862 | 520952 | 0.93 |
| Nyamira | 4853 | 520952 | 0.93 |
| Nandi | 4722 | 520952 | 0.91 |
| Nyandarua | 4268 | 520952 | 0.82 |
| Samburu | 4134 | 520952 | 0.79 |
| Marsabit | 3874 | 520952 | 0.74 |
| Mandera | 3834 | 520952 | 0.74 |
| Isiolo | 3783 | 520952 | 0.73 |
| Taita Taveta | 3529 | 520952 | 0.68 |
| Wajir | 3475 | 520952 | 0.67 |
| Elgeyo Marakwet | 3422 | 520952 | 0.66 |
| Tana River | 2597 | 520952 | 0.50 |
| Lamu | 1913 | 520952 | 0.37 |
county_summary <- county_summary %>%
mutate(County = factor(County, levels = rev(County)),
n_thousands = n / 1000) # convert to thousands
ggplot(county_summary, aes(x = County, y = n_thousands, fill = County)) +
geom_col() +
scale_fill_grey(start = 0.3, end = 0.8) +
coord_flip() +
labs(
title = "TB Cases Distribution by County",
x = "County",
y = "Number of TB Patients (thousands)"
) +
theme_minimal(base_size = 14) +
theme(legend.position = "none") +
scale_y_continuous(expand = c(0,0))
ggplotly(p, tooltip = "text")
dat2 %>%
count(`Gender`) %>%
mutate(percentage = n / sum(n) * 100) %>%
flextable()
Gender | n | percentage |
|---|---|---|
Female | 183,455 | 35.21527 |
Male | 337,498 | 64.78473 |
dat3 <- dat2 %>%
mutate(
County = trimws(County), # remove extra spaces
County = case_when(
County == "Tharaka Nithi" ~ "Tharaka-Nithi",
County == "Elgeyo Marakwet" ~ "Elgeyo-Marakwet",
County == "Pokot" ~ "West Pokot",
TRUE ~ County
)
)
tb_calc %>%
select(
County,
Total_Population19,
TB_cases,
overall_percent,
prevalence_per_100k
) %>%
arrange(desc(TB_cases)) %>%
flextable() %>%
set_header_labels(
County = "County",
Total_Population19 = "Population (2019)",
TB_cases = "TB Cases",
overall_percent = "Overall (%)",
prevalence_per_100k = "Prevalence / 100,000"
) %>%
autofit()
County | Population (2019) | TB Cases | Overall (%) | Prevalence / 100,000 |
|---|---|---|---|---|
Nairobi | 4,397,073 | 74,676 | 14.33 | 1,698.3 |
Kiambu | 2,417,735 | 27,393 | 5.26 | 1,133.0 |
Meru | 1,545,714 | 24,892 | 4.78 | 1,610.4 |
Mombasa | 1,208,333 | 24,804 | 4.76 | 2,052.7 |
Nakuru | 2,162,202 | 20,595 | 3.95 | 952.5 |
Machakos | 1,421,932 | 15,811 | 3.04 | 1,111.9 |
Turkana | 926,976 | 15,398 | 2.96 | 1,661.1 |
Siaya | 993,183 | 14,472 | 2.78 | 1,457.1 |
Kitui | 1,136,187 | 14,399 | 2.76 | 1,267.3 |
Kisumu | 1,155,574 | 14,004 | 2.69 | 1,211.9 |
Kakamega | 1,867,579 | 13,793 | 2.65 | 738.5 |
Homa Bay | 1,131,950 | 13,728 | 2.64 | 1,212.8 |
Murang'a | 1,056,640 | 12,748 | 2.45 | 1,206.5 |
Kericho | 901,777 | 12,331 | 2.37 | 1,367.4 |
Kisii | 1,266,860 | 11,586 | 2.22 | 914.5 |
Makueni | 987,653 | 11,344 | 2.18 | 1,148.6 |
Kilifi | 1,453,787 | 11,102 | 2.13 | 763.7 |
Bomet | 875,689 | 10,616 | 2.04 | 1,212.3 |
Bungoma | 1,670,570 | 10,150 | 1.95 | 607.6 |
Uasin Gishu | 1,163,186 | 10,060 | 1.93 | 864.9 |
Migori | 1,116,436 | 9,950 | 1.91 | 891.2 |
Embu | 608,599 | 9,789 | 1.88 | 1,608.4 |
Kajiado | 1,117,840 | 9,591 | 1.84 | 858.0 |
West Pokot | 621,241 | 9,483 | 1.82 | 1,526.5 |
Narok | 1,157,873 | 9,193 | 1.76 | 794.0 |
Nyeri | 759,164 | 8,720 | 1.67 | 1,148.6 |
Kirinyaga | 610,411 | 8,211 | 1.58 | 1,345.2 |
Trans Nzoia | 990,341 | 7,191 | 1.38 | 726.1 |
Busia | 893,681 | 6,562 | 1.26 | 734.3 |
Tharaka-Nithi | 393,177 | 6,491 | 1.25 | 1,650.9 |
Laikipia | 518,580 | 5,966 | 1.15 | 1,150.4 |
Kwale | 866,820 | 5,822 | 1.12 | 671.7 |
Garissa | 841,353 | 5,661 | 1.09 | 672.8 |
Vihiga | 590,013 | 5,154 | 0.99 | 873.5 |
Baringo | 666,763 | 4,862 | 0.93 | 729.2 |
Nyamira | 605,576 | 4,853 | 0.93 | 801.4 |
Nandi | 885,711 | 4,722 | 0.91 | 533.1 |
Nyandarua | 638,289 | 4,268 | 0.82 | 668.7 |
Samburu | 310,327 | 4,134 | 0.79 | 1,332.1 |
Marsabit | 459,785 | 3,874 | 0.74 | 842.6 |
Mandera | 867,457 | 3,834 | 0.74 | 442.0 |
Isiolo | 268,002 | 3,783 | 0.73 | 1,411.6 |
Taita Taveta | 340,671 | 3,529 | 0.68 | 1,035.9 |
Wajir | 781,263 | 3,475 | 0.67 | 444.8 |
Elgeyo-Marakwet | 454,480 | 3,422 | 0.66 | 752.9 |
Tana River | 315,943 | 2,597 | 0.50 | 822.0 |
Lamu | 143,920 | 1,913 | 0.37 | 1,329.2 |
1 | 0.00 |
tb_sex_wide %>%
select(County, Female, Male, F_to_M_ratio) %>%
filter(!is.na(F_to_M_ratio)) %>%
arrange(desc(F_to_M_ratio)) %>%
mutate(
Female = round(Female, 0),
Male = round(Male, 0),
F_to_M_ratio = round(F_to_M_ratio, 2)
) %>%
kable(
col.names = c("County", "Female TB cases", "Male TB cases", "F:M Ratio"),
caption = "TB Cases by Gender and Female-to-Male Ratio by County (NAs removed)"
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "center"
)
| County | Female TB cases | Male TB cases | F:M Ratio |
|---|---|---|---|
| Homa Bay | 6233 | 7495 | 0.83 |
| Siaya | 6489 | 7983 | 0.81 |
| Samburu | 1825 | 2309 | 0.79 |
| Marsabit | 1573 | 2301 | 0.68 |
| Nyamira | 1967 | 2886 | 0.68 |
| Tana River | 1053 | 1544 | 0.68 |
| Busia | 2614 | 3948 | 0.66 |
| Bungoma | 4001 | 6149 | 0.65 |
| Kisumu | 5514 | 8490 | 0.65 |
| Mandera | 1517 | 2317 | 0.65 |
| Kakamega | 5396 | 8397 | 0.64 |
| Turkana | 6013 | 9385 | 0.64 |
| Kilifi | 4269 | 6833 | 0.62 |
| Kwale | 2198 | 3624 | 0.61 |
| Migori | 3784 | 6166 | 0.61 |
| Pokot | 3603 | 5880 | 0.61 |
| Wajir | 1314 | 2161 | 0.61 |
| Isiolo | 1404 | 2379 | 0.59 |
| Kisii | 4292 | 7294 | 0.59 |
| Narok | 3423 | 5770 | 0.59 |
| Trans Nzoia | 2632 | 4559 | 0.58 |
| Nairobi | 26993 | 47683 | 0.57 |
| Bomet | 3818 | 6798 | 0.56 |
| Elgeyo Marakwet | 1215 | 2207 | 0.55 |
| Garissa | 2002 | 3659 | 0.55 |
| Kericho | 4315 | 8016 | 0.54 |
| Laikipia | 2093 | 3873 | 0.54 |
| Lamu | 673 | 1240 | 0.54 |
| Kajiado | 3322 | 6269 | 0.53 |
| Baringo | 1663 | 3199 | 0.52 |
| Uasin Gishu | 3456 | 6604 | 0.52 |
| Kiambu | 9101 | 18292 | 0.50 |
| Mombasa | 8238 | 16566 | 0.50 |
| Nakuru | 6826 | 13769 | 0.50 |
| Nandi | 1559 | 3163 | 0.49 |
| Taita Taveta | 1158 | 2371 | 0.49 |
| Tharaka Nithi | 2135 | 4356 | 0.49 |
| Nyandarua | 1383 | 2885 | 0.48 |
| Vihiga | 1616 | 3538 | 0.46 |
| Embu | 2944 | 6845 | 0.43 |
| Kitui | 4299 | 10100 | 0.43 |
| Machakos | 4730 | 11081 | 0.43 |
| Makueni | 3424 | 7920 | 0.43 |
| Nyeri | 2608 | 6112 | 0.43 |
| Meru | 6938 | 17954 | 0.39 |
| Murang’a | 3561 | 9187 | 0.39 |
| Kirinyaga | 2271 | 5940 | 0.38 |
| NA | 0 | 1 | 0.00 |
year_counts <- dat5 %>%
count(Year)
ggplot(year_counts, aes(x = Year, y = n)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
geom_vline(xintercept = 2020, linetype = "dashed") +
annotate("text", x = 2020, y = max(year_counts$n),
label = "COVID starts", hjust = -0.1) +
theme_minimal() +
labs(
title = "Time Trend with COVID Period Marker",
x = "Year",
y = "Number of Records"
)
tb_cases_covid <- dat5 %>%
group_by(County, covid_period) %>%
summarise(TB_cases = n(), .groups = "drop")
gender_county_presentable %>% flextable()
covid_period | County | Female | Male | F_to_M_ratio |
|---|---|---|---|---|
Post-COVID | Baringo | 927 | 1,624 | 0.57 |
Pre-COVID | Baringo | 736 | 1,575 | 0.47 |
Post-COVID | Bomet | 1,985 | 3,433 | 0.58 |
Pre-COVID | Bomet | 1,833 | 3,365 | 0.54 |
Post-COVID | Bungoma | 1,611 | 2,873 | 0.56 |
Pre-COVID | Bungoma | 2,390 | 3,276 | 0.73 |
Post-COVID | Busia | 1,054 | 1,756 | 0.60 |
Pre-COVID | Busia | 1,560 | 2,192 | 0.71 |
Post-COVID | Elgeyo-Marakwet | 564 | 1,096 | 0.51 |
Pre-COVID | Elgeyo-Marakwet | 651 | 1,111 | 0.59 |
Post-COVID | Embu | 1,501 | 3,408 | 0.44 |
Pre-COVID | Embu | 1,443 | 3,437 | 0.42 |
Post-COVID | Garissa | 955 | 1,738 | 0.55 |
Pre-COVID | Garissa | 1,047 | 1,921 | 0.55 |
Post-COVID | Homa Bay | 3,321 | 3,782 | 0.88 |
Pre-COVID | Homa Bay | 2,912 | 3,713 | 0.78 |
Post-COVID | Isiolo | 653 | 1,142 | 0.57 |
Pre-COVID | Isiolo | 751 | 1,237 | 0.61 |
Post-COVID | Kajiado | 1,534 | 3,125 | 0.49 |
Pre-COVID | Kajiado | 1,788 | 3,144 | 0.57 |
Post-COVID | Kakamega | 2,411 | 3,974 | 0.61 |
Pre-COVID | Kakamega | 2,985 | 4,423 | 0.67 |
Post-COVID | Kericho | 2,217 | 4,077 | 0.54 |
Pre-COVID | Kericho | 2,098 | 3,939 | 0.53 |
Post-COVID | Kiambu | 4,294 | 9,174 | 0.47 |
Pre-COVID | Kiambu | 4,807 | 9,118 | 0.53 |
Post-COVID | Kilifi | 1,792 | 3,125 | 0.57 |
Pre-COVID | Kilifi | 2,477 | 3,708 | 0.67 |
Post-COVID | Kirinyaga | 1,051 | 3,001 | 0.35 |
Pre-COVID | Kirinyaga | 1,220 | 2,939 | 0.42 |
Post-COVID | Kisii | 2,048 | 3,819 | 0.54 |
Pre-COVID | Kisii | 2,244 | 3,475 | 0.65 |
Post-COVID | Kisumu | 2,462 | 3,738 | 0.66 |
Pre-COVID | Kisumu | 3,052 | 4,752 | 0.64 |
Post-COVID | Kitui | 2,002 | 5,107 | 0.39 |
Pre-COVID | Kitui | 2,297 | 4,993 | 0.46 |
Post-COVID | Kwale | 1,017 | 1,775 | 0.57 |
Pre-COVID | Kwale | 1,181 | 1,849 | 0.64 |
Post-COVID | Laikipia | 920 | 1,775 | 0.52 |
Pre-COVID | Laikipia | 1,173 | 2,098 | 0.56 |
Post-COVID | Lamu | 339 | 635 | 0.53 |
Pre-COVID | Lamu | 334 | 605 | 0.55 |
Post-COVID | Machakos | 2,089 | 5,294 | 0.39 |
Pre-COVID | Machakos | 2,641 | 5,787 | 0.46 |
Post-COVID | Makueni | 1,508 | 3,665 | 0.41 |
Pre-COVID | Makueni | 1,916 | 4,255 | 0.45 |
Post-COVID | Mandera | 738 | 1,162 | 0.64 |
Pre-COVID | Mandera | 779 | 1,155 | 0.67 |
Post-COVID | Marsabit | 791 | 1,098 | 0.72 |
Pre-COVID | Marsabit | 782 | 1,203 | 0.65 |
Post-COVID | Meru | 3,136 | 8,725 | 0.36 |
Pre-COVID | Meru | 3,802 | 9,229 | 0.41 |
Post-COVID | Migori | 1,755 | 3,105 | 0.57 |
Pre-COVID | Migori | 2,029 | 3,061 | 0.66 |
Post-COVID | Mombasa | 4,127 | 8,359 | 0.49 |
Pre-COVID | Mombasa | 4,111 | 8,207 | 0.50 |
Post-COVID | Murang'a | 1,892 | 5,108 | 0.37 |
Pre-COVID | Murang'a | 1,669 | 4,079 | 0.41 |
Post-COVID | Nairobi | 12,486 | 22,480 | 0.56 |
Pre-COVID | Nairobi | 14,507 | 25,203 | 0.58 |
Post-COVID | Nakuru | 3,100 | 6,823 | 0.45 |
Pre-COVID | Nakuru | 3,726 | 6,946 | 0.54 |
Post-COVID | Nandi | 751 | 1,590 | 0.47 |
Pre-COVID | Nandi | 808 | 1,573 | 0.51 |
Post-COVID | Narok | 1,587 | 2,853 | 0.56 |
Pre-COVID | Narok | 1,836 | 2,917 | 0.63 |
Post-COVID | Nyamira | 1,018 | 1,584 | 0.64 |
Pre-COVID | Nyamira | 949 | 1,302 | 0.73 |
Post-COVID | Nyandarua | 593 | 1,347 | 0.44 |
Pre-COVID | Nyandarua | 790 | 1,538 | 0.51 |
Post-COVID | Nyeri | 1,312 | 3,121 | 0.42 |
Pre-COVID | Nyeri | 1,296 | 2,991 | 0.43 |
Post-COVID | Samburu | 910 | 1,176 | 0.77 |
Pre-COVID | Samburu | 915 | 1,133 | 0.81 |
Post-COVID | Siaya | 3,777 | 4,645 | 0.81 |
Pre-COVID | Siaya | 2,712 | 3,338 | 0.81 |
Post-COVID | Taita Taveta | 577 | 1,220 | 0.47 |
Pre-COVID | Taita Taveta | 581 | 1,151 | 0.50 |
Post-COVID | Tana River | 530 | 756 | 0.70 |
Pre-COVID | Tana River | 523 | 788 | 0.66 |
Post-COVID | Tharaka-Nithi | 926 | 2,109 | 0.44 |
Pre-COVID | Tharaka-Nithi | 1,209 | 2,247 | 0.54 |
Post-COVID | Trans Nzoia | 1,132 | 2,036 | 0.56 |
Pre-COVID | Trans Nzoia | 1,500 | 2,523 | 0.59 |
Post-COVID | Turkana | 3,159 | 5,023 | 0.63 |
Pre-COVID | Turkana | 2,854 | 4,362 | 0.65 |
Post-COVID | Uasin Gishu | 1,710 | 3,402 | 0.50 |
Pre-COVID | Uasin Gishu | 1,746 | 3,202 | 0.55 |
Post-COVID | Vihiga | 719 | 1,681 | 0.43 |
Pre-COVID | Vihiga | 897 | 1,857 | 0.48 |
Post-COVID | Wajir | 623 | 1,013 | 0.62 |
Pre-COVID | Wajir | 691 | 1,148 | 0.60 |
Post-COVID | West Pokot | 1,644 | 2,858 | 0.58 |
Pre-COVID | West Pokot | 1,959 | 3,022 | 0.65 |
##Objective 2
tb_outcome_year_wide <- tb_outcome_year %>%
select(Year, outcome, cases) %>%
pivot_wider(
names_from = outcome,
values_from = cases,
values_fill = 0
)
tb_outcome_year_wide %>% flextable()
Year | Cured | Died | Failed | Lost to follow-up | Missing | Not evaluated | Transferred out | Treatment completed |
|---|---|---|---|---|---|---|---|---|
2,017 | 31,228 | 4,956 | 273 | 4,238 | 1 | 375 | 3,745 | 34,152 |
2,018 | 36,195 | 6,430 | 430 | 5,501 | 0 | 502 | 1,878 | 47,231 |
2,019 | 36,351 | 5,618 | 515 | 5,090 | 0 | 380 | 1,283 | 40,923 |
2,020 | 32,613 | 5,379 | 450 | 4,165 | 0 | 609 | 896 | 32,406 |
2,021 | 34,345 | 5,044 | 488 | 4,154 | 0 | 541 | 468 | 36,402 |
2,022 | 38,330 | 5,495 | 626 | 4,539 | 0 | 1,096 | 552 | 45,060 |
ggplot(outcome_year, aes(x = factor(Year), y = cases, fill = outcome)) +
geom_col(width = 0.8) +
labs(
title = "TB Treatment Outcomes by Year",
x = "Year",
y = "Number of Patients",
fill = "Outcome"
) +
theme_minimal(base_size = 13) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1)
)
ggplot(success_year, aes(x = Year, y = success_rate)) +
geom_line(size = 1.2, color = "#1E8449") +
geom_point(size = 3, color = "#1E8449") +
geom_hline(yintercept = 90, linetype = "dotted", color = "grey40") +
labs(
title = "TB Treatment Success Rate by Year",
x = "Year",
y = "Success Rate (%)"
) +
theme_minimal(base_size = 13)
ggplot(covid_outcomes, aes(x = covid_period, y = percent, fill = outcome)) +
geom_col(width = 0.6) +
labs(
title = "TB Treatment Outcomes: Pre- vs Post-COVID",
x = "",
y = "Percentage of Patients",
fill = "Outcome"
) +
theme_minimal(base_size = 13)
p_death <- ggplot(death_year, aes(x = Year, y = n)) +
geom_line(size = 1.3, color = "#B03A2E") +
geom_point(size = 3, color = "#B03A2E") +
geom_vline(xintercept = 2020, linetype = "dashed", color = "grey40") +
labs(
title = "TB Mortality Trend by Year",
x = "Year",
y = "Number of Deaths"
) +
theme_tb
p_death
age_gender_death %>% flextable()
age_group | Gender | deaths | total | mortality_rate |
|---|---|---|---|---|
15-29 | Female | 6,973 | 104,849 | 6.7 |
15-29 | Male | 11,806 | 192,486 | 6.1 |
30-44 | Female | 1,755 | 26,341 | 6.7 |
30-44 | Male | 3,007 | 47,968 | 6.3 |
45-59 | Female | 568 | 8,655 | 6.6 |
45-59 | Male | 929 | 16,058 | 5.8 |
60+ | Female | 926 | 14,347 | 6.5 |
60+ | Male | 1,666 | 26,840 | 6.2 |
<15 | Female | 1,958 | 29,263 | 6.7 |
<15 | Male | 3,334 | 54,146 | 6.2 |
ggplot(age_gender_death, aes(x = age_group, y = mortality_rate, fill = Gender)) +
geom_col(position = "dodge") +
labs(
title = "Mortality Rate by Age Group and Gender",
x = "Age Group",
y = "Mortality Rate (%)",
fill = "Gender"
) +
theme_minimal(base_size = 14)
ggplot(monthly_deaths, aes(x = month, y = deaths)) +
geom_col(fill = "#C0392B") +
geom_vline(xintercept = as.Date("2020-03-01"), linetype = "dashed", color = "grey40") +
annotate("text", x = as.Date("2020-03-01"), y = max(monthly_deaths$deaths),
label = "COVID onset", hjust = -0.1, vjust = -0.5, size = 4) +
labs(
title = "Monthly TB Deaths (Bar Plot)",
x = "Month",
y = "Number of Deaths"
) +
theme_minimal(base_size = 14) +
scale_x_date(date_labels = "%b-%Y", date_breaks = "2 months") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(monthly_deaths, aes(x = month, y = deaths)) +
geom_line(color = "#C0392B", size = 1.2) +
geom_point(color = "#C0392B", size = 2) +
geom_vline(xintercept = as.Date("2020-03-01"), linetype = "dashed", color = "grey40") +
annotate("text", x = as.Date("2020-03-01"), y = max(monthly_deaths$deaths),
label = "COVID onset", hjust = -0.1, size = 4) +
labs(
title = "Monthly TB Deaths (Interrupted Time Series)",
x = "Month",
y = "Number of Deaths"
) +
theme_minimal(base_size = 14)
##Case fatality per 100,000 TB cases
cfr_age_gender <- dat8 %>%
group_by(age_group, Gender) %>%
summarise(
deaths = sum(died, na.rm = TRUE),
tb_cases = n(),
case_fatality_100k = round((deaths / tb_cases) * 100000, 1),
.groups = "drop"
)
cfr_age_gender %>% flextable()
age_group | Gender | deaths | tb_cases | case_fatality_100k |
|---|---|---|---|---|
15-29 | Female | 6,973 | 104,849 | 6,650.5 |
15-29 | Male | 11,806 | 192,486 | 6,133.4 |
30-44 | Female | 1,755 | 26,341 | 6,662.6 |
30-44 | Male | 3,007 | 47,968 | 6,268.8 |
45-59 | Female | 568 | 8,655 | 6,562.7 |
45-59 | Male | 929 | 16,058 | 5,785.3 |
60+ | Female | 926 | 14,347 | 6,454.3 |
60+ | Male | 1,666 | 26,840 | 6,207.2 |
<15 | Female | 1,958 | 29,263 | 6,691.0 |
<15 | Male | 3,334 | 54,146 | 6,157.4 |
ggplot(cfr_age_gender,
aes(x = age_group, y = case_fatality_100k, fill = Gender)) +
geom_col(position = "dodge") +
labs(
title = "TB Case Fatality per 100,000 Cases",
x = "Age Group",
y = "Deaths per 100,000 TB Cases",
fill = "Gender"
) +
theme_minimal(base_size = 14)
ggplot(tb_summary_period,
aes(x = period, y = percent, fill = followup_outcome)) +
geom_col(width = 0.7) +
labs(
title = "TB Treatment Follow-up Outcomes Before, During, and After COVID-19",
x = "Period",
y = "Percentage of patients",
fill = "Outcome"
) +
theme_minimal(base_size = 14)
completion_rate <- tb_outcomes_period %>%
mutate(completed = if_else(`Treatment Outcome` %in% c("C", "TC"), 1, 0)) %>%
group_by(period) %>%
summarise(
completed = sum(completed),
total = n(),
completion_rate = completed / total * 100,
.groups = "drop"
)
completion_rate %>% flextable()
period | completed | total | completion_rate |
|---|---|---|---|
COVID | 124,777 | 145,208 | 85.92984 |
Post-COVID | 83,589 | 95,941 | 87.12542 |
Pre-COVID | 236,870 | 279,804 | 84.65569 |
or_table_clean %>%
kable(
col.names = c("Variable", "Adjusted OR", "95% CI Lower", "95% CI Upper", "P-value"),
caption = "Adjusted Odds Ratios for TB Treatment Completion (Univariate Model)"
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "center"
)
| Variable | Adjusted OR | 95% CI Lower | 95% CI Upper | P-value |
|---|---|---|---|---|
| Baseline (Pre-COVID, Female, <15 years) | 6.11 | 6.02 | 6.20 | 0 |
| Post-COVID vs Pre-COVID | 1.11 | 1.08 | 1.13 | 0 |
| periodPre-COVID | 0.90 | 0.89 | 0.92 | 0 |
or_multiv_clean %>%
mutate(
estimate = round(estimate, 2),
conf.low = round(conf.low, 2),
conf.high = round(conf.high, 2),
p.value = signif(p.value, 3)
) %>%
select(term, estimate, conf.low, conf.high, p.value) %>%
kable(
col.names = c(
"Variable",
"Adjusted OR",
"95% CI (Lower)",
"95% CI (Upper)",
"P-value"
),
caption = "Adjusted Odds Ratios for TB Treatment Completion"
) %>%
kable_styling(
bootstrap_options = c("striped", "hover"),
full_width = FALSE
)
| Variable | Adjusted OR | 95% CI (Lower) | 95% CI (Upper) | P-value |
|---|---|---|---|---|
| Baseline (Pre-COVID, Female, <15 years) | 7.95 | 7.57 | 8.36 | 0.00000 |
| COVID period vs Pre-COVID | 1.12 | 1.10 | 1.14 | 0.00000 |
| Post-COVID vs Pre-COVID | 1.24 | 1.22 | 1.27 | 0.00000 |
| Male vs Female | 0.87 | 0.86 | 0.89 | 0.00000 |
| Age 15–24 vs <15 | 1.08 | 1.02 | 1.14 | 0.00485 |
| Age 25–44 vs <15 | 0.73 | 0.70 | 0.77 | 0.00000 |
| Age 45–64 vs <15 | 0.68 | 0.64 | 0.71 | 0.00000 |
| Age ≥65 vs <15 | 0.53 | 0.50 | 0.56 | 0.00000 |